# Access Biological Data with {gmRi}
# nmfs_bio <- gmRi::gmri_survdat_prep(survdat_source = "bio")
withr::with_dir(rprojroot::find_root('_targets.R'),
tar_load(survdat_biological))
# Core cleanup:
# Make 5 year increments
bio_5yr_prep <- function(survdat_biological){
# Drop NA age data, restirct to summer and fall
nmfs_bio <- survdat_biological %>%
filter(is.na(age) == FALSE,
season %in% c("Summer", "Fall"))
yr5_breaks <- seq(1970, 2020, by = 5)
yr5_ends <- seq(1974, 2024, by = 5)
yr5_labels <- str_c(yr5_breaks, "-", yr5_ends)
yr5_labels <- yr5_labels[1:(length(yr5_labels)-1)]
#Add 5-year groups, decade labels
nmfs_bio <- nmfs_bio %>%
mutate(
yearclass = est_year - (age-1),
five_yr_group = cut(
est_year,
breaks = yr5_breaks,
labels = yr5_labels,
include.lowest = TRUE,
ordered_result = TRUE),
decade = floor_decade(est_year)) %>%
as.data.frame()
}
# run cleanup
nmfs_bio <- bio_5yr_prep(survdat_biological = survdat_biological)
In the original proposal it was stated that 17 species would be used, species that are regularly measured and aged. Ordering all species by the number aged we can pull out the following top species:
#### Data Prep ####
# Rank species by how many measurements there are
species_abunds <- nmfs_bio %>%
count(comname) %>%
arrange(desc(n)) # ordered by number measured
# Reorder alphabetically
vonbert_species <- sort(species_abunds$comname[1:17])
These will be the species that we assess length-at-age characteristics for: acadian redfish, american plaice, atlantic cod, atlantic herring, black sea bass, butterfish, goosefish, haddock, pollock, red hake, scup, silver hake, summer flounder, white hake, winter flounder, witch flounder and yellowtail flounder
We also only care about data from the two main survey seasons, so we limit the data to just those seasons before splitting it out into lists for each species. The lists contain all data for a given species, broken into groups at a set interval of time. In this case it is five year increments.
# Name the list so it carries through
names(vonbert_species) <- vonbert_species
# Does not work for because of bad start points or no age data:
# skates, dogfish, fourspot flounder, goosefish
##### Pull species and make group id for later
species_data <- map(vonbert_species, function(species_name){
# Drop NA ages, set increment labels
nmfs_bio %>%
filter(comname == species_name) %>%
mutate(group_id = str_c(est_year, comname, sep = "-"))
})
The availability of data for fitting growth curves varies by species & sex for the different seasons and years of the survey. Some species are also much longer lived and have a longer age range that may be difficult to accurately fit without good representation in the data
# check crosstable
#xtabs(~yearclass + age, bind_rows(species_data, .id = "common name"))
bind_rows(species_data, .id = "common name") %>%
group_by(comname, catchsex, decade) %>%
summarise(
number_aged = n(),
ages_measured = n_distinct(age),
min_age = min(age),
max_age = max(age),
age_range = max_age - min_age,
num_per_age = number_aged / age_range,
.groups = "drop") %>%
mutate(across(where(is.numeric), round, 1),
age_range = NULL) %>%
setNames(
c("Common Name", "Sex", "Decade", "# Aged", "# of Ages", "Age Min", "Age Max", "Avg. Fish / Age")
) %>% DT::datatable()
# bind_rows(species_data, .id = "common name") %>%
# count(comname, est_year, yearclass, age) %>%
# ggplot(aes(est_year, age, size = n, color = yearclass)) +
# geom_point() +
# scale_color_gmri(discrete = FALSE) +
# labs(x = "Year", y = "Age", size = "Number Aged", color = "Cohort Year") +
# facet_wrap(~comname, ncol = 3) +
# guides(color = guide_colorbar(title.position = "top", title.hjust = 0.5),
# size = guide_legend(title.position = "top", title.hjust = 0.5))
The Von-Bertalanffy parameterization used is the “Typical” von Bertalanffy parameterization, also known as the Beverton-Holt parameterization, which is implemented using the {FSA} package.
We can check that this is the case by displaying the equations:
# "typical" von bert
typical <- growthFunShow(param = "Typical", "vonBertalanffy")
typical_plot <- ggplot() +
annotate(x = 1, y = 1, label = typical, geom = "text", size = 6) +
labs(title = '"Typical" Parameterization') +
theme_void()
bholt <- growthFunShow(param = "BevertonHolt", "vonBertalanffy")
bholt_plot <- ggplot() +
annotate(x = 1, y = 1, label = bholt, geom = "text", size = 6) +
labs(title = '"Beverton-Holt" Parameterization') +
theme_void()
typical_plot | bholt_plot
To start things off we first set the von Bert parameterization to solve using nonlinear least squares: nls()
( vb <- vbFuns(param="Typical") )
function (t, Linf, K = NULL, t0 = NULL)
{
if (length(Linf) == 3) {
K <- Linf[[2]]
t0 <- Linf[[3]]
Linf <- Linf[[1]]
}
Linf * (1 - exp(-K * (t - t0)))
}
<bytecode: 0x7fba423122d0>
<environment: 0x7fba423487d0>
Then once we have set that as the function we want to use, we can write a wrapper around it that will take input length and age data, some reasonable parameter starting points, and some minimum number of observations that we set to make sure groups with very few aged fish don’t break the workflow.
# Function to Pull Vonbert Coefficients
vonbert_coef <- function(length_age_dat, start_points, min_obs = 20){
# Von Bert Fitting Using: {FSA}
# Don't run on fewer than a minimum number of obervations
num_aged <- nrow(length_age_dat)
if(num_aged < min_obs){
na_df <- data.frame("Linf" = NA, "K" = NA, "t0" = NA, "n_aged" = num_aged,
"len_age_1" = NA, "len_age_2" = NA, "len_age_3" = NA)
return(na_df)
}
# Use nls to estimate VBGF parameters using starting points
vbert_fit <- nls(length_cm ~ vb(age, Linf, K, t0),
data = length_age_dat,
start = start_points)
# Access parameters with coef()
# Estimate sizes at age
vbert_coef <- as.data.frame(t(coef(vbert_fit))) %>%
mutate(n_aged = num_aged,
len_age_1 = Linf * (1 - exp(-1 * K * (1 - t0))),
len_age_2 = Linf * (1 - exp(-1 * K * (2 - t0))),
len_age_3 = Linf * (1 - exp(-1 * K * (3 - t0))))
return(vbert_coef)
}
This next function is a little redundant, but makes it easier to use purrr::map and repeat the estimations for each species:
# Function for running that for a single Species
species_vonbert <- function(comname, split_col, min_obs){
# Get starting points from all data
test_starts <- vbStarts(length_cm ~ age, data = species_data[[comname]])
# Get coefficients for groups
species_data[[comname]] %>%
split(.[split_col]) %>%
map_dfr(vonbert_coef, test_starts, min_obs = min_obs, .id = split_col) %>%
mutate(comname = comname)
}
Once everything is prepped we can then fun the von Bert parameter estimation for all the species we are interested using the desired groups.
# Running everything?
species_coef <- vonbert_species[c(1:17)] %>%
map_dfr(species_vonbert, split_col = "five_yr_group", min_obs = 30)
Since the table of coefficients is somewhat simple here is a function to select a species, the x_column to use (corresponding to the time grouping), and the name of the coefficient to plot.
# pick species and coefficients
plot_vonbert_coef <- function(comnames, x_col, coef_name){
x_col_sym <- sym(x_col)
coef_sym <- sym(coef_name)
coef_label <- switch (
coef_name,
Linf = "Asymptotic Max Length (cm)",
K = "Body Growth Coefficient (K)",
t0 = "Hypothetical Age of Zero Size (t0)",
n_aged = "Number Aged",
len_age_1 = "Age 1 length (cm)",
len_age_2 = "Age 2 length (cm)",
len_age_3 = "Age 3 length (cm)")
species_coef %>%
filter(comname %in% comnames) %>%
ggplot(aes(y = {{coef_sym}}, x = {{x_col_sym}})) +
geom_line(group = 1, linetype = 3) +
geom_point(size = 0.8) +
scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
facet_wrap(~comname, ncol = 1, scales = "free") +
labs(x = "Date Period", y = coef_label)
}
Using that function we can now take a look at the coefficients for any given species/coefficient combination.
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "Linf")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "K")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "t0")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "n_aged")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "len_age_1")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "len_age_2")
plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"),
x_col = "five_yr_group",
coef_name = "len_age_3")
# Plotting age distribution
plot_age_bubbleplot <- function(species){
age_bubble <- species_data[[species]] %>%
count(comname, est_year, age) %>%
ggplot(aes(x = est_year, y = age, size = n)) +
geom_point(shape = 21, color = "white", fill = gmri_cols("gmri blue")) +
facet_wrap(~comname) +
labs(x = "Year", y = "Age", size = "Number Measured") +
guides(size = guide_legend(title.position = "top", title.hjust = 0.5))
size_bubble <- species_data[[species]] %>%
count(comname, est_year, age, length_cm) %>%
ggplot(aes(x = est_year, y = length_cm)) +
geom_jitter(aes(color = age), height = 0, width = 0.3, alpha = 0.75) +
scale_color_gmri(discrete = FALSE) +
facet_wrap(~comname) +
labs(x = "Year", y = "Length (cm)", color = "Age") +
guides(color = guide_colorbar(title.position = "top", title.hjust = 0.5))
figs <- age_bubble / size_bubble
return(figs)
}
# Process Each species
bubble_plots <- map(.x = vonbert_species, .f = plot_age_bubbleplot)
bubble_plots[["atlantic cod"]]
bubble_plots[["haddock"]]
bubble_plots[["acadian redfish"]]
bubble_plots[["winter flounder"]]
bubble_plots[["haddock"]]
# Plotting age distribution as ggridges
plot_age_ridgeplot <- function(species){
species_data[[species]] %>%
mutate(yr = factor(est_year),
yr = fct_rev(yr)) %>%
ggplot(aes(x = age, y = yr)) +
geom_density_ridges(fill = gmri_cols("gmri blue"), color = "white") +
facet_wrap(~comname) +
scale_x_continuous(limits = c(0,NA)) +
labs(x = "Age", y = "Year") +
guides(size = guide_legend(title.position = "top", title.hjust = 0.5))
}
# Process Each species
ridge_plots <- map(vonbert_species, plot_age_ridgeplot)
ridge_plots[["atlantic cod"]]
ridge_plots[["haddock"]]
ridge_plots[["acadian redfish"]]
ridge_plots[["winter flounder"]]
ridge_plots[["haddock"]]
Fish born as part of the same cohort are likely to exhibit similar patterns in growth due to their exposure to similar environments at the same points in their development.
For this reason and others it may be meaningful to capture that cohort effect when tracking changes in growth coefficients.
One way to do this is with mixed effects models.
test_species <- species_data$`acadian redfish` %>%
mutate(
age = as.numeric(age),
year = factor(est_year),
yearclass = factor(yearclass)
)
# Function used earlier (no random effect)
vb
# Mean Function for nlme model, includes cohort year "yearclass"
vbert_form <- ~ Linf * (1 - exp(-K * (t - t0))) + (1 | yearclass)
# will add random effect into nlme: + (1 | yearclass)
# use derive to construct function
nfun <- deriv(expr = vbert_form,
namevec = c("Linf","K", "t0"),
function.arg = c("Linf","K", "t0"))
# Set starting vector
# Get starting points from all data
test_starts <- vbStarts(length_cm ~ as.numeric(age), data = test_species)
# Play around until it runs
nlmer(length_cm ~ nfun(age, Linf, K, t0) ~ (1 | yearclass),
data = test_species,
start = test_starts)
For average age and average size by species the stratified abundances are used. Length is freely available, whereas age must be estimated by length.